library(readxl) library(readr) library(mbar) library(arules) library(arulesViz) library(magrittr) library(dplyr) library(lubridate) library(forcats) library(ggplot2)
mba_data <- read_excel("online-retail.xlsx") transactions <- mbar_prep_data(mba_data, InvoiceNo, Description) head(transactions)
items <- mba_data %>% group_by(InvoiceNo) %>% summarize(count = n()) %>% pull(count) mean(items) median(items)
mba_data %>% group_by(Description) %>% summarize(count = n()) %>% arrange(desc(count))
total_revenue <- mba_data %>% group_by(InvoiceNo) %>% summarize(order_sum = sum(UnitPrice)) %>% pull(order_sum) %>% sum() total_transactions <- mba_data %>% group_by(InvoiceNo) %>% summarize(n()) %>% nrow() total_revenue / total_transactions
basket_data <- read.transactions("transaction_data.csv", format = "basket", sep = ",") basket_data
summary(basket_data)
itemFrequencyPlot(basket_data, topN = 10, type = 'absolute')
rules <- apriori(basket_data, parameter = list(supp=0.009, conf=0.8, target = "rules", maxlen = 4))
summary(rules)
basket_rules <- sort(rules, by = 'confidence', decreasing = TRUE) inspect(basket_rules[1:10])
rules_redundant <- basket_rules %>% is.subset(rules) %>% colSums() %>% is_greater_than(1) %>% which()
rules_redundant
rules_pruned <- basket_rules[-rules_redundant] inspect(rules_pruned)
sugar_rules <- apriori(basket_data, parameter = list(supp = 0.009, conf = 0.8), appearance = list(default = "lhs", rhs = "SUGAR"), control = list(verbose = F)) rules_sugar <- sort(sugar_rules, by = "confidence", decreasing = TRUE) inspect(rules_sugar)
sugar_rules <- apriori(basket_data, parameter = list(supp = 0.009, conf = 0.8), appearance = list(default = "rhs", lhs = "SUGAR"), control = list(verbose = F)) rules_sugar <- sort(sugar_rules, by = "confidence", decreasing = TRUE) inspect(rules_sugar)
top_rules <- basket_rules[1:10] inspect(top_rules)
plot(basket_rules)
plot(top_rules, method = 'graph')